Run download_data.Rmd and percentage_of_regional_richness.Rmd First!
library(randomForest)
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.
library(reshape2)
library(rpart)
library(ggplot2)
Attaching package: ‘ggplot2’
The following object is masked from ‘package:randomForest’:
margin
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ tibble 3.1.2 ✓ dplyr 1.0.7
✓ tidyr 1.1.3 ✓ stringr 1.4.0
✓ readr 1.4.0 ✓ forcats 0.5.1
✓ purrr 0.3.4
── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::combine() masks randomForest::combine()
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
x ggplot2::margin() masks randomForest::margin()
library(multcomp)
Loading required package: mvtnorm
Loading required package: survival
Loading required package: TH.data
Loading required package: MASS
Attaching package: ‘MASS’
The following object is masked from ‘package:dplyr’:
select
Attaching package: ‘TH.data’
The following object is masked from ‘package:MASS’:
geyser
library(car)
Loading required package: carData
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 methods overwritten by 'car':
method from
influence.merMod lme4
cooks.distance.influence.merMod lme4
dfbeta.influence.merMod lme4
dfbetas.influence.merMod lme4
Attaching package: ‘car’
The following object is masked from ‘package:dplyr’:
recode
The following object is masked from ‘package:purrr’:
some
city_data
length(city_data$city_gdp_per_population[!is.na(city_data$city_gdp_per_population)])
[1] 30
length(city_data$percentage_urban_area_as_open_public_spaces[!is.na(city_data$percentage_urban_area_as_open_public_spaces)])
[1] 61
length(city_data$happiness_future_life[!is.na(city_data$happiness_future_life)])
[1] 65
length(city_data$mean_population_exposure_to_pm2_5_2019[!is.na(city_data$mean_population_exposure_to_pm2_5_2019)])
[1] 131
fetch_city_data_for <- function(pool_name, include_city_name = F) {
results_filename <- paste(paste('percentage_of_regional_richness__output_', pool_name, 'city', 'richness', 'intercept', sep = "_"), "csv", sep = ".")
results <- read_csv(results_filename)
joined <- left_join(city_data, results)
required_columns <- c("population_growth", "rainfall_monthly_min", "rainfall_annual_average", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "temperature_monthly_max", "happiness_negative_effect", "happiness_positive_effect", "happiness_future_life", "number_of_biomes", "realm", "biome_name", "region_20km_includes_estuary", "region_50km_includes_estuary", "region_100km_includes_estuary", "city_includes_estuary", "region_20km_average_pop_density", "region_50km_average_pop_density", "region_100km_average_pop_density", "city_max_pop_density", "city_average_pop_density", "mean_population_exposure_to_pm2_5_2019", "region_20km_cultivated", "region_20km_urban", "region_50km_cultivated", "region_50km_urban", "region_100km_cultivated", "region_100km_urban", "region_20km_elevation_delta", "region_20km_mean_elevation", "region_50km_elevation_delta", "region_50km_mean_elevation", "region_100km_elevation_delta", "region_100km_mean_elevation", "city_elevation_delta", "city_mean_elevation", "urban", "shrubs", "permanent_water", "open_forest", "herbaceous_wetland", "herbaceous_vegetation", "cultivated", "closed_forest", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_streets", "percentage_urban_area_as_open_public_spaces_and_streets", "percentage_urban_area_as_open_public_spaces", "city_gdp_per_population", "city_ndvi", "city_ssm", "city_susm", "region_20km_ndvi", "region_20km_ssm", "region_20km_susm", "region_50km_ndvi", "region_50km_ssm", "region_50km_susm", "region_100km_ndvi", "region_100km_ssm", "region_100km_susm", "city_percentage_protected", "region_20km_percentage_protected", "region_50km_percentage_protected", "region_100km_percentage_protected")
if (include_city_name) {
required_columns <- append(c("name"), required_columns)
}
required_columns <- append(c("response"), required_columns)
joined[,required_columns]
}
merlin_city_data <- fetch_city_data_for('merlin')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
merlin_city_data
merlin_city_data_fixed <- rfImpute(response ~ ., merlin_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.14 117.27 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.15 117.32 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.42 118.82 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.36 118.50 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.47 119.09 |
merlin_city_data_fixed
ggplot(merlin_city_data_fixed, aes(response)) + geom_histogram(binwidth = 2)
source('./helper__random_forest_selection_functions.R')
scale_parameter_name <- function(scale, postscript) {
paste('region', paste(scale, 'km', sep = ''), postscript, sep = '_')
}
scale_parameters <- function(postscript) {
c(scale_parameter_name(20, postscript), scale_parameter_name(50, postscript), scale_parameter_name(100, postscript))
}
scales_parameters_without <- function(scale_to_exclude, postscript) {
scales <- scale_parameters(postscript)
scales[scales != scale_parameter_name(scale_to_exclude, postscript)]
}
select_scales <- function(urban, cultivated, elevation_delta, mean_elevation, average_pop_density, includes_estuary, ssm, susm, ndvi, percentage_protected) {
append(
append(
append(
append(
scales_parameters_without(scale_to_exclude = urban, postscript = 'urban'),
scales_parameters_without(scale_to_exclude = cultivated, postscript = 'cultivated')
),
append(
scales_parameters_without(scale_to_exclude = elevation_delta, postscript = 'elevation_delta'),
scales_parameters_without(scale_to_exclude = mean_elevation, postscript = 'mean_elevation')
)
),
append(
append(
scales_parameters_without(scale_to_exclude = average_pop_density, postscript = 'average_pop_density'),
scales_parameters_without(scale_to_exclude = includes_estuary, postscript = 'includes_estuary')
),
append(
scales_parameters_without(scale_to_exclude = ssm, postscript = 'ssm'),
scales_parameters_without(scale_to_exclude = susm, postscript = 'susm')
)
)
),
append(
scales_parameters_without(scale_to_exclude = ndvi, postscript = 'ndvi'),
scales_parameters_without(scale_to_exclude = percentage_protected, postscript = 'percentage_protected')
)
)
}
select_scales(urban = 20, cultivated = 100, elevation_delta = 20, mean_elevation = 100, average_pop_density = NA, includes_estuary = NA, ssm = 20, susm = 20, ndvi = 100, percentage_protected = NA)
[1] "region_50km_urban" "region_100km_urban" "region_20km_cultivated" "region_50km_cultivated" "region_50km_elevation_delta"
[6] "region_100km_elevation_delta" "region_20km_mean_elevation" "region_50km_mean_elevation" "region_20km_average_pop_density" "region_50km_average_pop_density"
[11] "region_100km_average_pop_density" "region_20km_includes_estuary" "region_50km_includes_estuary" "region_100km_includes_estuary" "region_50km_ssm"
[16] "region_100km_ssm" "region_50km_susm" "region_100km_susm" "region_20km_ndvi" "region_50km_ndvi"
[21] "region_20km_percentage_protected" "region_50km_percentage_protected" "region_100km_percentage_protected"
select_scales(urban = , cultivated = , elevation_delta = , mean_elevation = , average_pop_density = , includes_estuary = , ssm = , susm = , ndvi =, percentage_protected = )
select_variables_from_random_forest(merlin_city_data_fixed)
[1] "region_50km_ssm" "biome_name" "region_100km_ssm"
[4] "region_50km_elevation_delta" "region_20km_elevation_delta" "permanent_water"
[7] "region_20km_ssm" "region_20km_urban" "region_100km_elevation_delta"
[10] "shrubs" "city_ndvi" "city_gdp_per_population"
[13] "region_50km_urban" "region_20km_cultivated" "temperature_annual_average"
[16] "region_50km_cultivated" "region_100km_cultivated" "temperature_monthly_min"
[19] "region_100km_susm" "herbaceous_wetland" "region_50km_average_pop_density"
[22] "region_50km_ndvi" "city_max_pop_density" "region_100km_average_pop_density"
[25] "temperature_monthly_max" "happiness_positive_effect" "region_100km_urban"
[28] "realm" "city_average_pop_density" "region_20km_average_pop_density"
[31] "region_50km_percentage_protected" "region_100km_percentage_protected" "city_ssm"
[34] "city_elevation_delta" "city_mean_elevation" "region_50km_susm"
[37] "region_20km_mean_elevation" "region_20km_percentage_protected" "herbaceous_vegetation"
[40] "urban" "rainfall_monthly_max" "city_percentage_protected"
[43] "cultivated" "region_100km_mean_elevation" "region_20km_susm"
[46] "city_susm" "rainfall_annual_average" "population_growth"
[49] "happiness_negative_effect" "rainfall_monthly_min" "share_of_population_within_400m_of_open_space"
[52] "region_100km_ndvi" "region_50km_mean_elevation" "percentage_urban_area_as_open_public_spaces_and_streets"
[55] "open_forest" "region_20km_ndvi" "percentage_urban_area_as_streets"
[58] "closed_forest" "percentage_urban_area_as_open_public_spaces"
select_variables_from_random_forest(merlin_city_data_fixed_single_scale)
[1] "region_50km_ssm" "region_50km_elevation_delta" "biome_name"
[4] "city_gdp_per_population" "permanent_water" "city_ndvi"
[7] "temperature_annual_average" "region_20km_cultivated" "shrubs"
[10] "temperature_monthly_min" "region_20km_urban" "herbaceous_wetland"
[13] "city_max_pop_density" "city_ssm" "region_50km_average_pop_density"
[16] "city_average_pop_density" "realm" "temperature_monthly_max"
[19] "happiness_positive_effect" "region_50km_percentage_protected" "rainfall_monthly_max"
[22] "city_mean_elevation" "region_100km_susm" "city_percentage_protected"
[25] "cultivated" "happiness_future_life" "rainfall_annual_average"
[28] "urban" "happiness_negative_effect" "region_20km_mean_elevation"
[31] "share_of_population_within_400m_of_open_space" "rainfall_monthly_min" "population_growth"
[34] "region_50km_ndvi" "open_forest" "percentage_urban_area_as_open_public_spaces_and_streets"
[37] "percentage_urban_area_as_open_public_spaces" "closed_forest" "percentage_urban_area_as_streets"
[40] "city_susm"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm")])
[1] "Mean 25.1327179439473 , SD: 0.216956545963824 , Mean + SD: 25.3496744899111"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta")])
[1] "Mean 19.6242084196811 , SD: 0.282975547429027 , Mean + SD: 19.9071839671101"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name")])
[1] "Mean 19.2510597598754 , SD: 0.237378636197946 , Mean + SD: 19.4884383960734"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population")])
[1] "Mean 17.5667371783479 , SD: 0.225378699860072 , Mean + SD: 17.792115878208"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water")])
[1] "Mean 17.764917887126 , SD: 0.253600901946159 , Mean + SD: 18.0185187890721"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi")])
[1] "Mean 18.1713060031583 , SD: 0.274695354565878 , Mean + SD: 18.4460013577242"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average")])
[1] "Mean 18.4997318566786 , SD: 0.236978359007798 , Mean + SD: 18.7367102156864"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated")])
[1] "Mean 18.151366275243 , SD: 0.310327391925745 , Mean + SD: 18.4616936671687"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs")])
[1] "Mean 18.1996679903146 , SD: 0.32269285565908 , Mean + SD: 18.5223608459736"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min")])
[1] "Mean 18.3993943899942 , SD: 0.305819135869039 , Mean + SD: 18.7052135258632"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban")])
[1] "Mean 18.2983031433913 , SD: 0.27248663139474 , Mean + SD: 18.570789774786"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland")])
[1] "Mean 18.4617430190922 , SD: 0.256208381412962 , Mean + SD: 18.7179514005052"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density")])
[1] "Mean 18.3158359875326 , SD: 0.270120208103265 , Mean + SD: 18.5859561956359"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_ssm")])
[1] "Mean 18.6428753216302 , SD: 0.26630095939741 , Mean + SD: 18.9091762810276"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_ssm", "region_50km_average_pop_density")])
[1] "Mean 18.8052699642646 , SD: 0.246919293071592 , Mean + SD: 19.0521892573362"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_ssm", "region_50km_average_pop_density", "city_average_pop_density")])
[1] "Mean 19.0500779913321 , SD: 0.281562617871374 , Mean + SD: 19.3316406092035"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_ssm", "region_50km_average_pop_density", "city_average_pop_density", "realm")])
[1] "Mean 18.9638490604482 , SD: 0.304880439343029 , Mean + SD: 19.2687294997913"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_ssm", "region_50km_average_pop_density", "city_average_pop_density", "realm", "temperature_monthly_max")])
[1] "Mean 19.1737445329045 , SD: 0.30237720581189 , Mean + SD: 19.4761217387164"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_ssm", "region_50km_average_pop_density", "city_average_pop_density", "realm", "temperature_monthly_max", "happiness_positive_effect")])
[1] "Mean 19.2237641823427 , SD: 0.305017487786321 , Mean + SD: 19.5287816701291"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_ssm", "region_50km_average_pop_density", "city_average_pop_density", "realm", "temperature_monthly_max", "happiness_positive_effect", "region_50km_percentage_protected")])
[1] "Mean 19.2731318874597 , SD: 0.293953451788727 , Mean + SD: 19.5670853392485"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "biome_name", "city_gdp_per_population", "permanent_water", "city_ndvi", "temperature_annual_average", "region_20km_cultivated", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_ssm", "region_50km_average_pop_density", "city_average_pop_density", "realm", "temperature_monthly_max", "happiness_positive_effect", "region_50km_percentage_protected", "rainfall_monthly_max")])
[1] "Mean 19.3829197721043 , SD: 0.293734015304046 , Mean + SD: 19.6766537874083"
“region_50km_ssm”, “region_50km_elevation_delta”, “biome_name”, “city_gdp_per_population”
birdlife_city_data <- fetch_city_data_for('birdlife')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
birdlife_city_data
birdlife_city_data_fixed <- rfImpute(response ~ ., birdlife_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.641 89.30 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.686 90.01 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.74 90.87 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.665 89.67 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.797 91.77 |
birdlife_city_data_fixed
select_variables_from_random_forest(birdlife_city_data_fixed)
[1] "population_growth" "region_50km_ssm" "region_100km_ssm"
[4] "city_ndvi" "region_100km_cultivated" "region_50km_cultivated"
[7] "region_20km_ssm" "region_100km_susm" "region_20km_susm"
[10] "rainfall_monthly_max" "biome_name" "permanent_water"
[13] "temperature_monthly_min" "region_50km_susm" "region_20km_average_pop_density"
[16] "rainfall_monthly_min" "city_ssm" "region_50km_ndvi"
[19] "region_100km_ndvi" "region_20km_ndvi" "percentage_urban_area_as_open_public_spaces_and_streets"
[22] "share_of_population_within_400m_of_open_space" "region_50km_average_pop_density" "percentage_urban_area_as_open_public_spaces"
[25] "mean_population_exposure_to_pm2_5_2019" "region_20km_cultivated" "city_average_pop_density"
[28] "region_100km_average_pop_density" "region_100km_urban" "temperature_annual_average"
[31] "region_20km_elevation_delta" "percentage_urban_area_as_streets" "rainfall_annual_average"
[34] "city_susm" "realm" "region_50km_elevation_delta"
[37] "shrubs" "region_20km_urban" "happiness_future_life"
[40] "region_100km_percentage_protected" "city_max_pop_density" "city_elevation_delta"
[43] "region_100km_mean_elevation" "happiness_positive_effect" "region_20km_percentage_protected"
[46] "region_50km_urban" "region_50km_percentage_protected" "region_50km_mean_elevation"
[49] "city_mean_elevation" "closed_forest" "herbaceous_wetland"
[52] "city_gdp_per_population" "urban" "region_20km_mean_elevation"
[55] "region_100km_elevation_delta" "open_forest" "herbaceous_vegetation"
[58] "city_percentage_protected" "cultivated" "happiness_negative_effect"
[61] "temperature_monthly_max"
select_variables_from_random_forest(birdlife_city_data_fixed_single_scale)
[1] "population_growth" "region_50km_ssm" "region_100km_cultivated"
[4] "city_ndvi" "biome_name" "region_100km_susm"
[7] "rainfall_monthly_min" "rainfall_monthly_max" "city_ssm"
[10] "region_20km_average_pop_density" "permanent_water" "temperature_monthly_min"
[13] "percentage_urban_area_as_open_public_spaces_and_streets" "region_50km_ndvi" "temperature_annual_average"
[16] "region_100km_urban" "region_20km_elevation_delta" "rainfall_annual_average"
[19] "share_of_population_within_400m_of_open_space" "percentage_urban_area_as_open_public_spaces" "mean_population_exposure_to_pm2_5_2019"
[22] "city_average_pop_density" "shrubs" "city_susm"
[25] "realm" "percentage_urban_area_as_streets" "city_max_pop_density"
[28] "city_elevation_delta" "city_gdp_per_population" "happiness_future_life"
[31] "open_forest" "happiness_positive_effect" "closed_forest"
[34] "urban" "city_mean_elevation" "cultivated"
[37] "temperature_monthly_max" "herbaceous_vegetation" "happiness_negative_effect"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth")])
[1] "Mean 6.35021321550458 , SD: 0.0593939328578979 , Mean + SD: 6.40960714836247"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm")])
[1] "Mean 4.84915699583042 , SD: 0.0818098257429198 , Mean + SD: 4.93096682157334"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated")])
[1] "Mean 5.28049378048091 , SD: 0.0775169354173377 , Mean + SD: 5.35801071589825"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi")])
[1] "Mean 5.12061471889155 , SD: 0.0874280154413976 , Mean + SD: 5.20804273433295"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name")])
[1] "Mean 5.15325796547024 , SD: 0.0694110168148363 , Mean + SD: 5.22266898228508"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi")])
[1] "Mean 5.2434172819644 , SD: 0.0806361112621407 , Mean + SD: 5.32405339322654"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density")])
[1] "Mean 5.24489256584117 , SD: 0.0766441089091058 , Mean + SD: 5.32153667475027"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water")])
[1] "Mean 5.04554202135548 , SD: 0.0836282380010259 , Mean + SD: 5.12917025935651"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min")])
[1] "Mean 5.12412938740328 , SD: 0.0730235792725426 , Mean + SD: 5.19715296667582"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets")])
[1] "Mean 5.13911115863577 , SD: 0.0955009377977672 , Mean + SD: 5.23461209643353"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi")])
[1] "Mean 5.13817856130947 , SD: 0.0716785842473469 , Mean + SD: 5.20985714555681"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi", "temperature_annual_average")])
[1] "Mean 5.215868046915 , SD: 0.0765190804551457 , Mean + SD: 5.29238712737014"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi", "temperature_annual_average", "region_100km_urban")])
[1] "Mean 5.1564082999918 , SD: 0.0829912635065974 , Mean + SD: 5.2393995634984"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi", "temperature_annual_average", "region_100km_urban", "region_20km_elevation_delta")])
[1] "Mean 5.14814218492991 , SD: 0.0749108151793109 , Mean + SD: 5.22305300010922"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi", "temperature_annual_average", "region_100km_urban", "region_20km_elevation_delta", "rainfall_annual_average")])
[1] "Mean 5.23244059006495 , SD: 0.0774719747962131 , Mean + SD: 5.30991256486116"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi", "temperature_annual_average", "region_100km_urban", "region_20km_elevation_delta", "rainfall_annual_average", "share_of_population_within_400m_of_open_space")])
[1] "Mean 5.23874499037273 , SD: 0.0863811628674503 , Mean + SD: 5.32512615324018"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi", "temperature_annual_average", "region_100km_urban", "region_20km_elevation_delta", "rainfall_annual_average", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_open_public_spaces")])
[1] "Mean 5.35567550025148 , SD: 0.0933112444550703 , Mean + SD: 5.44898674470655"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi", "temperature_annual_average", "region_100km_urban", "region_20km_elevation_delta", "rainfall_annual_average", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_open_public_spaces", "mean_population_exposure_to_pm2_5_2019")])
[1] "Mean 5.36212621452981 , SD: 0.0841422083666618 , Mean + SD: 5.44626842289647"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi", "temperature_annual_average", "region_100km_urban", "region_20km_elevation_delta", "rainfall_annual_average", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_open_public_spaces", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density")])
[1] "Mean 5.38148077142908 , SD: 0.0792499982773259 , Mean + SD: 5.46073076970641"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi", "temperature_annual_average", "region_100km_urban", "region_20km_elevation_delta", "rainfall_annual_average", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_open_public_spaces", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "shrubs")])
[1] "Mean 5.37060887208482 , SD: 0.100105116953198 , Mean + SD: 5.47071398903801"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "region_100km_cultivated", "city_ndvi", "biome_name", "city_ndvi", "region_20km_average_pop_density", "permanent_water", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces_and_streets", "region_50km_ndvi", "temperature_annual_average", "region_100km_urban", "region_20km_elevation_delta", "rainfall_annual_average", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_open_public_spaces", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "shrubs", "city_susm")])
[1] "Mean 5.47535632462927 , SD: 0.0825514964435405 , Mean + SD: 5.55790782107281"
“population_growth”, “region_50km_ssm”
| So…. |
|---|
| Merlin: “region_50km_ssm”, “region_50km_elevation_delta”, “biome_name”, “city_gdp_per_population” Birdlife: “population_growth”, “region_50km_ssm” |
library(boot)
Attaching package: ‘boot’
The following object is masked from ‘package:car’:
logit
The following object is masked from ‘package:survival’:
aml
merlin_city_data_named <- fetch_city_data_for('merlin', T)
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
birdlife_city_data_named <- fetch_city_data_for('birdlife', T)
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
| Use cross validation and dropping terms to find best model |
full model: response ~ region_50km_ssm + region_50km_elevation_delta + biome_name + city_gdp_per_population + population_growth
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + biome_name + city_gdp_per_population + population_growth, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.47322
– CVE 19.47322 – Can we drop one?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_elevation_delta + biome_name + city_gdp_per_population + population_growth, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.7365
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + biome_name + city_gdp_per_population + population_growth, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.68562
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + city_gdp_per_population + population_growth, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.35732
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + biome_name + population_growth, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.39392
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + biome_name + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.12951
– drop biome_name to give smaller CVE of 18.35732 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_elevation_delta + city_gdp_per_population + population_growth, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.49017
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + city_gdp_per_population + population_growth, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.60964
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + population_growth, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.26184
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.02666
– drop population_growth to give CVE of 18.02666 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_elevation_delta + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.15699
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.29845
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 17.9362
– drop city_gdp_per_population to give CVE of 17.9362 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_elevation_delta, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.04985
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.241
| – best model with region_50km_ssm + region_50km_elevation_delta (CV error 17.9362) |
summary(glm(data = merlin_city_data_fixed, formula = response ~ region_50km_ssm + region_50km_elevation_delta))
Call:
glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta,
data = merlin_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-7.6984 -2.8713 -0.5247 1.7119 16.9525
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.6583859 1.1311108 2.350 0.0202 *
region_50km_ssm -0.1288796 0.0689039 -1.870 0.0636 .
region_50km_elevation_delta -0.0007078 0.0003457 -2.047 0.0426 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 17.38331)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 2329.4 on 134 degrees of freedom
AIC: 784.96
Number of Fisher Scoring iterations: 2
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + biome_name + city_gdp_per_population + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.755749
– can we drop a variable?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_elevation_delta + biome_name + city_gdp_per_population + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.701032
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + biome_name + city_gdp_per_population + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.906615
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + city_gdp_per_population + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.455193
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + biome_name + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.768164
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + biome_name + city_gdp_per_population, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.63928
– drop biome_name to give CVE of 6.455193 – can we drop another?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_elevation_delta + city_gdp_per_population + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.515736
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + city_gdp_per_population + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.38495
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.417311
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + city_gdp_per_population, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.392071
– drop region_50km_elevation_delta to give CVE of 6.38495 – can we drop another?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ city_gdp_per_population + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.476147
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.342025
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + city_gdp_per_population, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.331564
– drop population_growth to give CVE of 6.331564 – can we drop another?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ city_gdp_per_population, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.414695
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.291299
– drop city_gdp_per_population to give CVE of 6.291299 – is this better than no variable?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ 1, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.395701
– yes, just!
| – so best model with birdlife is region_50km_ssm |
summary(glm(data = birdlife_city_data_fixed, formula = response ~ region_50km_ssm))
Call:
glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-4.5353 -1.5461 -0.4124 1.3071 10.7572
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.26916 0.65041 1.951 0.0531 .
region_50km_ssm -0.08499 0.04115 -2.065 0.0408 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 6.214378)
Null deviance: 865.45 on 136 degrees of freedom
Residual deviance: 838.94 on 135 degrees of freedom
AIC: 643.06
Number of Fisher Scoring iterations: 2
| Lets look at SSM for both pools |
ggplot(merlin_city_data_fixed_no_boreal, aes(x = region_50km_ssm, y = response)) + geom_point() + geom_smooth(method = "glm", se = F)
`geom_smooth()` using formula 'y ~ x'
ggplot(birdlife_city_data_fixed_no_boreal, aes(x = region_50km_ssm, y = response)) + geom_point() + geom_smooth(method = "glm", se = F)
`geom_smooth()` using formula 'y ~ x'
| and include region_50km_elevation_delta for merlin |
ggplot(merlin_city_data_fixed_no_boreal, aes(x = region_50km_ssm, y = response, size = region_50km_elevation_delta)) + geom_point() + geom_smooth(method = "glm", se = F)
`geom_smooth()` using formula 'y ~ x'
| Check birdlife model fit |
birdlife.fit <- glm(data = birdlife_city_data_fixed_no_boreal, formula = response ~ region_50km_ssm)
summary(birdlife.fit)
Call:
glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_fixed_no_boreal)
Deviance Residuals:
Min 1Q Median 3Q Max
-4.5693 -1.5460 -0.4392 1.2913 10.7264
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.23286 0.65157 1.892 0.0606 .
region_50km_ssm -0.08135 0.04133 -1.969 0.0511 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 6.216274)
Null deviance: 857.07 on 135 degrees of freedom
Residual deviance: 832.98 on 134 degrees of freedom
AIC: 638.43
Number of Fisher Scoring iterations: 2
with(summary(birdlife.fit), 1 - deviance/null.deviance)
[1] 0.02810766
plot(birdlife.fit)
ggplot(birdlife_city_data_fixed_no_boreal, aes(x = region_50km_ssm, y = response)) +
geom_point(size=1) +
geom_smooth(method = "glm", se = F) +
geom_text(aes(label = name), data = birdlife_city_data_fixed_no_boreal[c(16, 53, 72),], size = 3, position = "dodge", vjust = "inward", hjust = "inward", color = "red", angle=-15) +
geom_point(data = birdlife_city_data_fixed_no_boreal[c(16, 53, 72),], color = "red") +
theme_bw() +
ylab("City Random Effect Intercept") + xlab("Regional (50km) SSM") + labs(title = "Birdlife")
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
ggsave("city_effect_richness__output__birdlife.jpg")
Saving 7.29 x 4.51 in image
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
| Check Merlin model fit |
merlin.fit <- glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_50km_ssm + region_50km_elevation_delta)
summary(merlin.fit)
Call:
glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta,
data = merlin_city_data_fixed_no_boreal)
Deviance Residuals:
Min 1Q Median 3Q Max
-7.7040 -2.8410 -0.5643 1.7350 16.9652
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.6290037 1.1334734 2.319 0.0219 *
region_50km_ssm -0.1238835 0.0693129 -1.787 0.0762 .
region_50km_elevation_delta -0.0007285 0.0003473 -2.097 0.0378 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 17.43624)
Null deviance: 2458.9 on 135 degrees of freedom
Residual deviance: 2319.0 on 133 degrees of freedom
AIC: 779.68
Number of Fisher Scoring iterations: 2
with(summary(merlin.fit), 1 - deviance/null.deviance)
[1] 0.05688841
plot(merlin.fit)
ggplot(merlin_city_data_fixed_no_boreal, aes(x = region_50km_ssm, y = response)) +
geom_point(aes(size = region_50km_elevation_delta)) +
geom_smooth(method = "glm", se = F) +
geom_text(aes(label = name), data = merlin_city_data_fixed_no_boreal[c(24, 30, 42),], size = 3, position = "dodge", vjust = "inward", hjust = "inward", color = "red", angle=-15) +
geom_point(data = merlin_city_data_fixed_no_boreal[c(24, 30, 42),], color = "red") +
theme_bw() +
theme(legend.position="bottom") +
ylab("City Random Effect Intercept") + xlab("Regional (50km) SSM") + labs(title = "eBird") + guides(size=guide_legend(title="Regional (50km) Elevation Delta"))
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
ggsave("city_effect_richness__output__merlin.jpg")
Saving 7.29 x 4.51 in image
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
| How much variation have we explained? |
merlin_city_data_fixed_no_boreal$residuals <- resid(merlin.fit)
ggplot(merlin_city_data_fixed_no_boreal, aes(y = response, x = residuals)) +
geom_smooth(method = "lm", se = F) +
geom_point(aes(color = realm)) +
geom_text(aes(label = name), data = merlin_city_data_fixed_no_boreal[c(24, 30, 42),], size = 4, position = "dodge", vjust = "inward", hjust = "inward") +
labs(title = "Merlin", subtitle = paste("Correlation", cor(merlin_city_data_fixed_no_boreal$residuals, merlin_city_data_fixed_no_boreal$response))) +
theme_bw()
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
ggplot(birdlife_city_data_fixed_no_boreal, aes(y = response, x = residuals)) +
geom_smooth(method = "lm", se = F, alpha = 0.5) +
geom_point(aes(color = realm)) +
geom_text(aes(label = name), data = birdlife_city_data_fixed_no_boreal[c(16, 53, 72),], size = 4, position = "dodge", vjust = "inward", hjust = "inward") +
labs(title = "Birdlife", subtitle = paste("Correlation", cor(birdlife_city_data_fixed_no_boreal$residuals, birdlife_city_data_fixed_no_boreal$response))) +
theme_bw()
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
| Check AIC |